// This file is part of Notepad4.
// See License.txt for details about distribution and modification.
//! Lexer for Nim

#include <cassert>
#include <cstring>

#include <string>
#include <string_view>
#include <vector>

#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"

#include "WordList.h"
#include "LexAccessor.h"
#include "Accessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "StringUtils.h"
#include "LexerModule.h"

using namespace Lexilla;

namespace {

constexpr bool IsFmtString(int state) noexcept {
	if constexpr (SCE_NIM_FMTSTRING & 1) {
		return state & true;
	} else {
		return (state & 1) == 0;
	}
}

constexpr bool IsTripleString(int state) noexcept {
	return state == SCE_NIM_TRIPLE_STRING || state == SCE_NIM_TRIPLE_FMTSTRING;
}

// https://nim-lang.org/docs/manual.html#lexical-analysis-string-literals
struct EscapeSequence {
	int outerState = SCE_NIM_DEFAULT;
	int digitsLeft = 0;
	bool hex = false;
	bool brace = false;

	// highlight any character as escape sequence.
	void resetEscapeState(int state, int chNext) noexcept {
		outerState = state;
		digitsLeft = 1;
		hex = true;
		brace = false;
		if (chNext == 'x') {
			digitsLeft = 3;
		} else if (chNext == 'u') {
			digitsLeft = 5;
		} else if (IsADigit(chNext)) {
			digitsLeft = 7;
			hex = false;
		}
	}
	void resetEscapeState(int state) noexcept {
		outerState = state;
		digitsLeft = 1;
		hex = false;
		brace = false;
	}
	bool atEscapeEnd(int ch) noexcept {
		--digitsLeft;
		return digitsLeft <= 0 || !IsDecimalOrHex(ch, hex);
	}
};

struct FormatStringState {
	int state;
	int parenCount;
};

enum class FormatStringPart {
	None,
	FormatSpec,
	End,
};

// https://nim-lang.org/docs/strformat.html
constexpr bool IsBraceFormatSpecifier(char ch) noexcept {
	return AnyOf(ch, 'b',
					'd',
					'e', 'E',
					'f', 'F',
					'g', 'G',
					'o',
					'x', 'X');
}

inline Sci_Position CheckBraceFormatSpecifier(const StyleContext &sc, LexAccessor &styler) noexcept {
	Sci_PositionU pos = sc.currentPos + 1;
	char ch = styler[pos];
	// [[fill] align]
	if (!AnyOf(ch, '\r', '\n', '{', '}')) {
		Sci_Position width = 1;
		if (ch & 0x80) {
			styler.GetCharacterAndWidth(pos, &width);
		}
		const char chNext = styler[pos + width];
		if (AnyOf(ch, '<', '>', '^') || AnyOf(chNext, '<', '>', '^')) {
			pos += 1 + width;
			ch = styler[pos];
		}
	}
	// [sign][#]
	if (ch == '+' || ch == '-' || ch == ' ') {
		ch = styler[++pos];
	}
	if (ch == '#') {
		ch = styler[++pos];
	}
	// [0][width]
	while (IsADigit(ch)) {
		ch = styler[++pos];
	}
	// [.precision]
	if (ch == '.') {
		ch = styler[++pos];
		while (IsADigit(ch)) {
			ch = styler[++pos];
		}
	}
	// [type]
	if (IsBraceFormatSpecifier(ch)) {
		++pos;
	}
	return pos - sc.currentPos;
}

//KeywordIndex++Autogenerated -- start of section automatically generated
enum {
	KeywordIndex_Keyword = 0,
	KeywordIndex_Type = 1,
	MaxKeywordSize = 16,
};
//KeywordIndex--Autogenerated -- end of section automatically generated

enum class KeywordType {
	None = SCE_NIM_DEFAULT,
	Pragma = SCE_NIM_PRAGMA,
	Type = SCE_NIM_TYPE,
	Function = SCE_NIM_FUNCTION_DEFINITION,
};

void ColouriseNimDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, LexerWordList keywordLists, Accessor &styler) {
	KeywordType kwType = KeywordType::None;
	int commentLevel = 0;
	int visibleChars = 0;
	int indentCount = 0;
	int lineState = 0;
	FormatStringPart fmtPart = FormatStringPart::None;
	EscapeSequence escSeq;
	std::vector<FormatStringState> nestedState;

	if (startPos != 0) {
		// backtrack to the line starts expression inside formatted string literal.
		BacktrackToStart(styler, PyLineStateStringInterpolation, startPos, lengthDoc, initStyle);
	}

	StyleContext sc(startPos, lengthDoc, initStyle, styler);
	if (sc.currentLine > 0) {
		lineState = styler.GetLineState(sc.currentLine - 1);
		commentLevel = (lineState >> 8) & 0xff;
		lineState = 0;
	}

	while (sc.More()) {
		switch (sc.state) {
		case SCE_NIM_OPERATOR:
		case SCE_NIM_OPERATOR2:
			sc.SetState(SCE_NIM_DEFAULT);
			break;

		case SCE_NIM_NUMBER:
			if (sc.ch == '\'' && IsIdentifierStartEx(sc.chNext)) {
				sc.Forward();
			} else if (!IsDecimalNumber(sc.chPrev, sc.ch, sc.chNext)) {
				sc.SetState(SCE_NIM_DEFAULT);
			}
			break;

		case SCE_NIM_IDENTIFIER:
			if (!IsIdentifierCharEx(sc.ch)) {
				char s[MaxKeywordSize];
				sc.GetCurrent(s, sizeof(s));
				if (keywordLists[KeywordIndex_Keyword].InList(s)) {
					sc.ChangeState(SCE_NIM_WORD);
					kwType = KeywordType::None;
					if (StrEqual(s, "type")) {
						kwType = KeywordType::Type;
					} else if (StrEqualsAny(s, "proc", "func", "method", "iterator", "macro", "template", "converter")) {
						kwType = KeywordType::Function;
					}
				} else if (keywordLists[KeywordIndex_Type].InList(s)) {
					sc.ChangeState(SCE_NIM_TYPE);
				} else if (kwType != KeywordType::None) {
					sc.ChangeState(static_cast<int>(kwType));
				} else if (sc.GetLineNextChar() == '(') {
					sc.ChangeState(SCE_NIM_FUNCTION);
				}
				if (sc.state != SCE_NIM_WORD) {
					kwType = KeywordType::None;
				}
				if (sc.ch == '\"') {
					if (sc.MatchNext('\"', '\"')) {
						if (StrEqual(s, "fmt")) {
							sc.ChangeState(SCE_NIM_TRIPLE_FMTSTRING);
						} else {
							sc.SetState(SCE_NIM_TRIPLE_STRING);
						}
						sc.Forward(2);
					} else if (s[1] == '\0' && UnsafeUpper(s[0]) == 'R') {
						sc.ChangeState(SCE_NIM_RAWSTRING);
					} else if (StrEqual(s, "fmt")) {
						sc.ChangeState(SCE_NIM_RAWFMTSTRING);
					} else {
						sc.SetState(SCE_NIM_RAWSTRING);
					}
				} else {
					sc.SetState(SCE_NIM_DEFAULT);
				}
			}
			break;

		case SCE_NIM_CHARACTER:
		case SCE_NIM_STRING:
		case SCE_NIM_FMTSTRING:
		case SCE_NIM_RAWSTRING:
		case SCE_NIM_RAWFMTSTRING:
		case SCE_NIM_TRIPLE_STRING:
		case SCE_NIM_TRIPLE_FMTSTRING:
			if (sc.atLineStart && sc.state < SCE_NIM_TRIPLE_STRING) {
				if (fmtPart == FormatStringPart::None) {
					sc.SetState(SCE_NIM_DEFAULT);
					break;
				}
			}
			if (sc.ch == '\\') {
				if (sc.state <= SCE_NIM_FMTSTRING) {
					escSeq.resetEscapeState(sc.state, sc.chNext);
					sc.SetState(SCE_NIM_ESCAPECHAR);
					sc.Forward();
					if (sc.Match('u', '{')) {
						escSeq.brace = true;
						sc.Forward();
					}
				}
			} else if (sc.ch == ((sc.state == SCE_NIM_CHARACTER) ? '\'' : '\"')) {
				if (sc.chNext == '\"' && (sc.state == SCE_NIM_RAWSTRING || sc.state == SCE_NIM_RAWFMTSTRING)) {
					escSeq.resetEscapeState(sc.state);
					sc.SetState(SCE_NIM_ESCAPECHAR);
					sc.Forward();
				} else if (sc.state < SCE_NIM_TRIPLE_STRING || sc.MatchNext('\"', '\"')) {
					if (sc.state >= SCE_NIM_TRIPLE_STRING) {
						// quotes except last three are string content
						while (sc.chNext == '\"') {
							sc.Forward();
						}
					}
					sc.ForwardSetState(SCE_NIM_DEFAULT);
				}
			} else if (sc.state != SCE_NIM_CHARACTER) {
				if (IsFmtString(sc.state)) {
					if (sc.ch == '{') {
						if (sc.chNext == '{') {
							escSeq.resetEscapeState(sc.state);
							sc.SetState(SCE_NIM_ESCAPECHAR);
							sc.Forward();
						} else {
							fmtPart = FormatStringPart::None;
							nestedState.push_back({sc.state, 0});
							sc.SetState(SCE_NIM_OPERATOR2);
						}
					} else if (sc.ch == '}') {
						if (nestedState.empty()) {
							if (sc.chNext == '}') {
								escSeq.resetEscapeState(sc.state);
								sc.SetState(SCE_NIM_ESCAPECHAR);
								sc.Forward();
							}
						} else {
							fmtPart = FormatStringPart::None;
							const int state = sc.state;
							nestedState.pop_back();
							sc.SetState(SCE_NIM_OPERATOR2);
							sc.ForwardSetState(state);
							continue;
						}
					} else if (sc.ch == ':' && fmtPart == FormatStringPart::FormatSpec) {
						fmtPart = FormatStringPart::End;
						const Sci_Position length = CheckBraceFormatSpecifier(sc, styler);
						if (length != 0) {
							const int state = sc.state;
							sc.SetState(SCE_NIM_FORMAT_SPECIFIER);
							sc.Advance(length);
							sc.SetState(state);
							continue;
						}
					}
				} else if (sc.ch == '$') {
					if (sc.chNext == '#' || IsADigit(sc.chNext)) {
						escSeq.outerState = sc.state;
						sc.SetState(SCE_NIM_PLACEHOLDER);
						sc.Forward();
					}
				}
			}
			break;

		case SCE_NIM_ESCAPECHAR:
			if (escSeq.atEscapeEnd(sc.ch)) {
				if (escSeq.brace && sc.ch == '}') {
					sc.Forward();
				}
				sc.SetState(escSeq.outerState);
				continue;
			}
			break;

		case SCE_NIM_PLACEHOLDER:
			if (!IsADigit(sc.ch)) {
				sc.SetState(escSeq.outerState);
				continue;
			}
			break;

		case SCE_NIM_BACKTICKS:
			if (sc.ch == '`') {
				if (kwType != KeywordType::None) {
					sc.ChangeState(static_cast<int>(kwType));
					kwType = KeywordType::None;
				}
				sc.ForwardSetState(SCE_NIM_DEFAULT);
			}
			break;

		case SCE_NIM_COMMENTLINE:
		case SCE_NIM_COMMENTLINEDOC:
			if (sc.atLineStart) {
				sc.SetState(SCE_NIM_DEFAULT);
			}
			break;

		case SCE_NIM_COMMENT:
		case SCE_NIM_COMMENTDOC:
			if (sc.atLineStart) {
				lineState = PyLineStateMaskCommentLine;
			}
			if (sc.ch == '#' && ((sc.state == SCE_NIM_COMMENT) ? (sc.chNext == '[') : sc.MatchNext('#', '['))) {
				++commentLevel;
				sc.Forward((sc.state == SCE_NIM_COMMENT) ? 1 : 2);
			} else if (sc.Match(']', '#') && (sc.state == SCE_NIM_COMMENT || sc.GetRelative(2) == '#')) {
				sc.Forward((sc.state == SCE_NIM_COMMENT) ? 1 : 2);
				--commentLevel;
				if (commentLevel == 0) {
					sc.ForwardSetState(SCE_NIM_DEFAULT);
					if (lineState == PyLineStateMaskCommentLine && sc.GetLineNextChar() != '\0') {
						lineState = 0;
					}
				}
			}
			break;
		}

		if (sc.state == SCE_NIM_DEFAULT) {
			if (sc.ch == '#') {
				if (visibleChars == 0) {
					lineState = PyLineStateMaskCommentLine;
				}
				if (sc.chNext == '#') {
					sc.SetState(SCE_NIM_COMMENTLINEDOC);
					sc.Forward();
					if (sc.chNext == '[') {
						commentLevel = 1;
						sc.ChangeState(SCE_NIM_COMMENTDOC);
						sc.Forward();
					}
				} else if (sc.chNext == '[') {
					commentLevel = 1;
					sc.SetState(SCE_NIM_COMMENT);
					sc.Forward();
				} else {
					sc.SetState(SCE_NIM_COMMENTLINE);
				}
			} else if (sc.ch == '\"') {
				sc.SetState((sc.chPrev == '&') ? SCE_NIM_FMTSTRING : SCE_NIM_STRING);
				if (sc.MatchNext('\"', '\"')) {
					static_assert(SCE_NIM_TRIPLE_STRING - SCE_NIM_STRING == SCE_NIM_TRIPLE_FMTSTRING - SCE_NIM_FMTSTRING);
					sc.ChangeState(sc.state + SCE_NIM_TRIPLE_STRING - SCE_NIM_STRING);
					sc.Forward(2);
				}
			} else if (sc.ch == '\'') {
				sc.SetState(SCE_NIM_CHARACTER);
			} else if (sc.ch == '`') {
				sc.SetState(SCE_NIM_BACKTICKS);
			} else if (IsADigit(sc.ch)) {
				sc.SetState(SCE_NIM_NUMBER);
			} else if (IsIdentifierStartEx(sc.ch)) {
				sc.SetState(SCE_NIM_IDENTIFIER);
			} else if (IsAGraphic(sc.ch)) {
				const bool interpolating = !nestedState.empty();
				sc.SetState(interpolating ? SCE_NIM_OPERATOR2 : SCE_NIM_OPERATOR);
				if (sc.Match('{', '.')) {
					kwType = KeywordType::Pragma;
				} else if (interpolating) {
					FormatStringState &state = nestedState.back();
					if (sc.ch == '(') {
						state.parenCount += 1;
					} else if (sc.ch == ')') {
						state.parenCount -= 1;
					} else if (state.parenCount <= 0 && (sc.ch == ':' || sc.ch == '=' || (sc.ch == '}' && sc.chPrev != '\\'))) {
						fmtPart = (sc.ch == '}') ? FormatStringPart::End : FormatStringPart::FormatSpec;
						sc.ChangeState(state.state);
						continue;
					}
				} else if (visibleChars == 0 && (sc.ch == '}' || sc.ch == ']' || sc.ch == ')')) {
					lineState |= PyLineStateMaskCloseBrace;
				}
			}
		}

		if (visibleChars == 0) {
			if (sc.ch == ' ') {
				++indentCount;
			} else if (sc.ch == '\t') {
				indentCount = GetTabIndentCount(indentCount);
			} else if (!isspacechar(sc.ch)) {
				visibleChars++;
			}
		}
		if (sc.atLineEnd) {
			if (!nestedState.empty()) {
				lineState = PyLineStateStringInterpolation | PyLineStateMaskTripleQuote;
			} else if (IsTripleString(sc.state)) {
				lineState = PyLineStateMaskTripleQuote;
			} else if (lineState == 0 && visibleChars == 0) {
				lineState = PyLineStateMaskEmptyLine;
			}
			lineState |= (indentCount << 16) | (commentLevel << 8);
			styler.SetLineState(sc.currentLine, lineState);
			lineState = 0;
			kwType = KeywordType::None;
			fmtPart = FormatStringPart::None;
			visibleChars = 0;
			indentCount = 0;
		}
		sc.Forward();
	}

	sc.Complete();
}

}

extern const LexerModule lmNim(SCLEX_NIM, ColouriseNimDoc, "nim", FoldPyDoc);
